Prepare the Workspace

knitr::opts_chunk$set(warning=FALSE, message=FALSE, error = FALSE) # rmd options
rm(list = ls()); invisible(gc()) # cleaning






Control Block

# f, f, 0.25 looks nice.
allowmigrants <- F # OPTIONS: T, F 
allowsympatry <- F # OPTIONS: T, F
minoverperc <- 0 # remove pairs that do not have thermal overlap (anagenesis)
costvar <- "ele"






Packages & Prefs

options(scipen = 999) # turn off scientific notation
'%notin%' <- Negate('%in%')
require(ggplot2) # load packages
require(GGally)
require(viridis)
require(caper)
library(dplyr)
library(stringr)
library(maps)
library(ape)
library(EnvStats)
library(forecast)
require(nlme)
require(geodist)
require(letsR)
require(spdep)
require(spatialreg)
require(rnaturalearth)
require(rnaturalearthdata)
require(rgeos)
require(sf)
require(rgdal)
require(raster)
world <- ne_coastline(scale = "medium", returnclass = "sf")
vlog <- function(x){
   log( x + abs(min( x , na.rm = T)) + 1)
}

setwd("~/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Analyze_Processed_Cluster_Outputs/Data")
The working directory was changed to /Users/boterolab1/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Analyze_Processed_Cluster_Outputs/Data inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
exclusion <- read.csv(file = "exclusion_nonsimpatric_nonmigrant.csv")
exclusion$realm1red[is.na(exclusion$realm1red)] <- "NA" # NA is north america not R's NA value. fix.
exclusion$realm2green[is.na(exclusion$realm2green)] <- "NA"






Load Main Data

# main dataframe ---------------------------------
setwd("~/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Process_Cluster_Outputs/Data")
load(file =  "Pair_Barrier_Data_FEB2021.rdata")
mydata <- mypairdata; rm(mypairdata)
rownames(mydata) <- mydata$Species.1
mydatahold <- mydata






initial masks

# migration ---
if(allowmigrants == F){
  mydata <- mydata[which(mydata$Migration == 1.0),]
}

# patry
if(allowsympatry == F){
  mydata <- mydata[which(mydata[,paste0(costvar, "_c0")] > 0 ),] # doesnt matter if you use ele, mat, vart paths here, will be same answer
}

sort

mydata$uniquePairId == exclusion$mydata.uniquePairId
  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [31] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
 [91] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[151] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[211] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
x <- mydata[which(mydata$Species.1 == "Apteryx_owenii"),]
mydata <- mydata[order(match(mydata$uniquePairId,exclusion$mydata.uniquePairId)), ]
y <- mydata[which(mydata$Species.1 == "Apteryx_owenii"),]
# sum(y!=x, na.rm = T) # checks.
# head(mydata)
mydata$uniquePairId == exclusion$mydata.uniquePairId
  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
 [73] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[109] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[145] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[217] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[253] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
rm(x,y)
# # Basic range maps for all pairs (no paths)
# wdPAM <- "/Users/boterolab1/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/PREP/PAM/Data"
# setwd(wdPAM); load("cbPAM.rdata")
# setwd("~/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Process_Cluster_Outputs/Data")
# pdf("pairmaps2.pdf", width = 19, height = 9.25)
# for (i in 1:nrow(mydata)){
# x <- cbPAM[,c("Longitude(x)","Latitude(y)",mydata$Species.1bl[i])]
# x <- as.data.frame(x[x[,3] == 1,])
# if(ncol(x) == 1) {
#   x <- t(x)
#   colnames(x) <- c("lon", "lat", "pres")
#   x <- as.data.frame(x)
# } else {
#   colnames(x) <- c("lon", "lat", "pres")
# }
# 
# y <- cbPAM[,c("Longitude(x)","Latitude(y)",mydata$Species.2bl[i])]
# y <- as.data.frame(y[y[,3] == 1,])
# if(ncol(y) == 1) {
#   y <- t(y)
#   colnames(y) <- c("lon", "lat", "pres")
#   y <- as.data.frame(y)
# 
# } else {
#   colnames(y) <- c("lon", "lat", "pres")
# }
# z <- ggplot(world)+
#     geom_sf() +
#     geom_point(data = x, aes(y=lat, x=lon), color = "red") +
#     geom_point(data = y, aes(y=lat, x=lon), color = "green") +
#     theme_bw() +
#     ggtitle(i)
# print(z)
# }
# print(i)
# dev.off()
mydata_exclusion <- cbind(mydata, exclusion)
save(mydata_exclusion, file = "mydata_exclusion.rdata")
mydata$realm1 <- exclusion$realm1red
mydata$realm2 <- exclusion$realm2green
mydata$realm <- paste0(mydata$realm1, mydata$realm2)
table(mydata$realm)

AAAA AAIM ATAT ATIM ATPA IMAA IMAT IMIM IMNT NANA NTAA NTNA NTNT OCOC PAAT PANA PAPA 
  30    1   45    3    1    3    3   27    1    4    1    1  135    2    3    2    6 
mydata$realm[mydata$realm == "AAIM"]; mydata$realm[mydata$realm == "IMAA"] <-  "AAIM"
[1] "AAIM"
mydata$realm[mydata$realm == "ATIM"]; mydata$realm[mydata$realm == "IMAT"] <-  "ATIM"
[1] "ATIM" "ATIM" "ATIM"
mydata$realm[mydata$realm == "ATPA"]; mydata$realm[mydata$realm == "PAAT"] <-  "ATPA"
[1] "ATPA"
mydata$realm[mydata$realm == "IMNT"]; mydata$realm[mydata$realm == "NTIM"] <-  "IMNT"
[1] "IMNT"
mydata$realm[mydata$realm == "NTAA"]; mydata$realm[mydata$realm == "AANT"] <-  "NTAA"
[1] "NTAA"
mydata$realm[mydata$realm == "NTNA"]; mydata$realm[mydata$realm == "NANT"] <-  "NTNA"
[1] "NTNA"
mydata$realm[mydata$realm == "PANA"]; mydata$realm[mydata$realm == "NAPA"] <-  "PANA"
[1] "PANA" "PANA"
mydata$realm <- as.factor(mydata$realm); mydata$realm <- relevel(mydata$realm, "NTNT")

mydata$landgap <- as.logical(exclusion$island)

mydata$cosmopolitan <- as.logical(exclusion$cosmopolitan)

mydata$new.old <- as.logical(exclusion$new.old)

rm(exclusion, mydata_exclusion)

Impose masks & do calcuations

# filter cosmopolitan and new/old world species (there are relatively few after imposing previous masks.)
mydata <- mydata[which(mydata$cosmopolitan == FALSE & mydata$new.old == FALSE),]

# dependent variable: elevational barrier size ---
mydata$cost <- mydata[, paste0(costvar, "_c25")] 

# data filtering -----------------------
#  thermal overlap ---
mydata$MAT_overlap <- mydata[,paste0("MAT", "_ov_perc_smrnge")]
mydata <- mydata[mydata$MAT_overlap > minoverperc,]

# update sort order --------------------
mydata$sortorder <- seq(1:nrow(mydata))

# longitude ----------------------------
mydata$lon <- mydata[,paste0("lon_mean_pair_", costvar, "_c25")]

# latitude ----------------------------
mydata$lat <- mydata[,paste0("lat_mean_pair_", costvar, "_c25")]

# temperature breadth -----------------
mydata$tas_breadth <- mydata$tas_range # mean(mean(sp1 annual tas range -- one value per cell), mean(sp2 annual tas range -- one value per cell))

# mean annual temperature --------------
mydata$tas_position <- mydata$tas_mean # mean(mean(sp1 annual tas mean  -- one value per cell), mean(sp2 annual tas mean  -- one value per cell))

# precipitation breadth --------------- 
mydata$pcp_breadth <- mydata$pcp_range # mean(mean(sp1 annual pcp range  -- one value per cell), mean(sp2 annual pcp range  -- one value per cell))

# precipitation breadth --------------- 
mydata$pcp_position <- mydata$pcp_mean # mean(mean(sp1 annual pcp mean  -- one value per cell), mean(sp2 annual pcp mean  -- one value per cell))

# distance -----------------------------
mydata$distance <- mydata[,paste0("centroid_distance_",costvar,"_c25")]

# mountain mass ------------------------
mtns <- readOGR(dsn="~/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Other_Input_Data/GMBA", layer="GMBA Mountain Inventory_v1.2-World", verbose = FALSE)
wdPAM <- "/Users/boterolab1/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/PREP/PAM/Data"
setwd(wdPAM); load("LonLat_BirdPAM_raster.rdata")
The working directory was changed to /Users/boterolab1/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/PREP/PAM/Data inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
mtns <- rasterize(mtns, LonLat_BirdPAM_raster)

load phylo and prune

# phylo ------------------------------------------
setwd("~/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Other_Input_Data/BirdTrees")
The working directory was changed to /Users/boterolab1/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Other_Input_Data/BirdTrees inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
load(file = "BirdTrees.Rdata")
tree <- trees[[1]]; rm(trees) # pick tree (VF GET TREES FROM COONEY!!! and use MCC tree.) -- currently just using tree 1 here. 
tree <- drop.tip(tree, tree$tip.label[which(tree$tip.label %notin% mydata$Species.1)]) # initial name matching.
mydata <- mydata[which(mydata$Species.1 %in% tree$tip.label),]
mydata <- mydata[match(tree$tip.label, mydata$Species.1),]
sum(mydata$Species.1 != tree$tip.label) # sorted (but still specify form below for safety)
[1] 0

Save freame for CB

setwd("~/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Analyze_Processed_Cluster_Outputs/Data")
The working directory was changed to /Users/boterolab1/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Analyze_Processed_Cluster_Outputs/Data inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
save(mydata, file = "ele_data_for_CB.rdata")
write.csv(mydata, file = "ele_data_for_CB.csv")

Neighbors for sptial analysis.

distm <- mydata[,c("lon", "lat")]
distm <- geodist(distm, measure = "geodesic")
rownames(distm) <- rownames(mydata)
colnames(distm) <- rownames(mydata)
basecols <- ncol(mydata)

# neightbors
coords<-cbind(mydata$lon, mydata$lat); coords<-as.matrix(coords) ; row.names(coords)<-rownames(mydata)
k1 <- knn2nb(knearneigh(coords, longlat = T))
knearneigh: identical points found
nb<- dnearneigh(coords,row.names = row.names(coords), d1=0,d2=max(unlist(nbdists(k1, coords, longlat = T))),longlat=T)

plots for transformations

for (i in c("cost", "lat", "lon","tas_breadth","tas_position","pcp_breadth","pcp_position", "mtn_mass", "dispersal_ability", "pair_age", "distance", "boundary_length", "MAT_overlap")) {
hist(mydata[, i], breaks = 50, main = i)
}

world plots

setwd("~/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Analyze_Processed_Cluster_Outputs/Data")
The working directory was changed to /Users/boterolab1/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Analyze_Processed_Cluster_Outputs/Data inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
pdf(file="variable_maps.pdf", width = 10, height = 7)
for (i in c("cost", "lat", "lon","tas_breadth","tas_position","pcp_breadth","pcp_position", "mtn_mass",  "water_buffering", "dispersal_ability", "pair_age", "distance", "boundary_length", "MAT_overlap", "realm", "landgap")) {
  if(i %in% c("cost")){
    myc <- mydata[, i]; myc <- vlog(myc)
    x <- ggplot(world)+
      geom_sf() +
      geom_point(data = mydata[order(mydata[, i], decreasing = F),], aes(y=lat, x=lon, color = myc), alpha = 0.85) +
      ggtitle(paste0("ln ",i))+
      scale_color_viridis()
    print(x)
  } else if (i %in% c("realm", "landgap")){
    x <- ggplot(world)+
      geom_sf() + 
      geom_point(data = mydata[order(mydata[, "cost"], decreasing = F),], aes(y=lat, x=lon, color = get(i)), alpha = 0.85) +
      ggtitle(paste0(i))+
      scale_color_viridis(discrete = T)
    print(x) 
  } else {
    x <- ggplot(world)+
      geom_sf() + 
      geom_point(data = mydata[order(mydata[, "cost"], decreasing = F),], aes(y=lat, x=lon, color = get(i)), alpha = 0.85) +
      ggtitle(i)+
      scale_color_viridis()
    print(x) 
  }
}
dev.off()
null device 
          1 

# vector 1 added ---
m <- gls(scale(I(vlog(cost))) ~ scale(I(vlog(tas_breadth))) + V22 + V23 + V24 + V25 + V26 + V27 + V28 + V29 + V30, correlation = corPagel(0.99, phy = tree,fixed = F, form = ~Species.1), data = mydata, method = "REML"); summary(m); cor(predict(m),scale(I(vlog(mydata$cost))))
Generalized least squares fit by REML
  Model: scale(I(vlog(cost))) ~ scale(I(vlog(tas_breadth))) + V22 + V23 +      V24 + V25 + V26 + V27 + V28 + V29 + V30 
  Data: mydata 

Correlation Structure: corPagel
 Formula: ~Species.1 
 Parameter estimate(s):
   lambda 
0.1132853 

Coefficients:

 Correlation: 
                            (Intr) s(I((_ V22    V23    V24    V25    V26    V27    V28    V29   
scale(I(vlog(tas_breadth))) -0.030                                                               
V22                          0.060 -0.016                                                        
V23                         -0.033  0.002 -0.008                                                 
V24                         -0.031  0.016 -0.012  0.005                                          
V25                          0.015  0.020 -0.003 -0.014  0.004                                   
V26                          0.032  0.007  0.004 -0.015 -0.006  0.006                            
V27                         -0.018 -0.020  0.003  0.003  0.001 -0.010 -0.003                     
V28                          0.068  0.000  0.001  0.000  0.002  0.003 -0.017  0.000              
V29                         -0.038  0.001 -0.012  0.002  0.017 -0.001 -0.018  0.001  0.000       
V30                          0.031  0.024 -0.007 -0.013 -0.011  0.013  0.061 -0.015  0.001 -0.014

Standardized residuals:
       Min         Q1        Med         Q3        Max 
-3.0866866 -0.5483974  0.1080253  0.6419830  2.6354891 

Residual standard error: 0.8999846 
Degrees of freedom: 235 total; 224 residual
          [,1]
[1,] 0.5013446
matx <- as.matrix(m$residuals); rownames(matx) <- rownames(mydata)
spac <- lets.correl(x=matx, y=distm, z=12, equidistant = T, plot = T)

moran.test(residuals(m), nb2listw(nb))$p.value # too much.
[1] 0.9215695
hist(resid(m))

plot(m, resid(., type = "p") ~ fitted(.), abline = 0)

plot(m, scale(I(vlog(cost))) ~ fitted(.), abline = c(0,1))

qqnorm(m)


for(i in c("V22", "V23", "V24", "V25", "V26", "V27", "V28", "V29", "V30")){
  x <- ggplot(world)+
  geom_sf() +
  geom_point(data = mydata[order(mydata[,i]),], aes(y=lat, x=lon, color = mydata[,i]), alpha = 0.9) +
  scale_color_viridis()+
  ggtitle(i)
  print(x)
}

Is thermal niche breadth predicted by latitude plus latitude2?


# vector 1 added ---
m <- gls(scale(I(vlog(tas_breadth))) ~ scale(lat) + scale(I(lat^2)) + V22 + V23 + V24 + V25 + V26 + V27 + V28 + V29 + V30 + V31 + V32 + V33 + V34 + V35 + V36 + V37, correlation = corPagel(0.99, phy = tree,fixed = F, form = ~Species.1), data = mydata, method = "REML"); summary(m); cor(predict(m),scale(I(vlog(mydata$cost))))
Generalized least squares fit by REML
  Model: scale(I(vlog(tas_breadth))) ~ scale(lat) + scale(I(lat^2)) +      V22 + V23 + V24 + V25 + V26 + V27 + V28 + V29 + V30 + V31 +      V32 + V33 + V34 + V35 + V36 + V37 
  Data: mydata 

Correlation Structure: corPagel
 Formula: ~Species.1 
 Parameter estimate(s):
   lambda 
0.5675592 

Coefficients:

 Correlation: 
                (Intr) scl(l) s(I(^2 V22    V23    V24    V25    V26    V27    V28    V29    V30    V31    V32    V33    V34    V35    V36   
scale(lat)       0.045                                                                                                                       
scale(I(lat^2)) -0.077  0.055                                                                                                                
V22              0.011 -0.056 -0.055                                                                                                         
V23              0.070  0.021  0.019  0.048                                                                                                  
V24              0.053  0.032 -0.058  0.076  0.005                                                                                           
V25             -0.024  0.005 -0.134  0.208  0.006  0.167                                                                                    
V26             -0.072 -0.037  0.034  0.040 -0.044 -0.007 -0.014                                                                             
V27             -0.007 -0.004 -0.078  0.050 -0.032 -0.022  0.090  0.028                                                                      
V28              0.028 -0.005 -0.006 -0.069 -0.003  0.037 -0.018 -0.057 -0.047                                                               
V29              0.002  0.011 -0.022 -0.006  0.002 -0.052  0.005  0.011  0.048 -0.004                                                        
V30             -0.038 -0.018 -0.018 -0.017 -0.009 -0.013 -0.005  0.030 -0.011 -0.008 -0.002                                                 
V31              0.005 -0.030 -0.009 -0.062 -0.030  0.021 -0.006 -0.046 -0.019  0.014 -0.023 -0.004                                          
V32              0.001 -0.005  0.012  0.022  0.004 -0.002 -0.021  0.004 -0.003 -0.002 -0.008 -0.011 -0.037                                   
V33             -0.008 -0.016 -0.070  0.043 -0.012 -0.014  0.026  0.012  0.050 -0.015  0.014  0.003 -0.007 -0.003                            
V34             -0.021  0.062  0.018 -0.018  0.010 -0.030 -0.030  0.026  0.022 -0.022  0.074  0.012 -0.027  0.010  0.036                     
V35             -0.005  0.012 -0.007 -0.006 -0.010 -0.013 -0.012 -0.001 -0.007 -0.001 -0.001  0.001 -0.020  0.002  0.003  0.004              
V36             -0.045 -0.014 -0.034  0.063 -0.007 -0.005  0.055  0.025  0.041 -0.022  0.021  0.008 -0.073 -0.028  0.013 -0.004 -0.003       
V37              0.027 -0.017 -0.021  0.004  0.017  0.013 -0.006  0.010 -0.006 -0.003 -0.015  0.002  0.008 -0.014 -0.005 -0.012  0.001 -0.008

Standardized residuals:
       Min         Q1        Med         Q3        Max 
-2.0731629 -0.6096382 -0.1790288  0.2623287  3.2865813 

Residual standard error: 0.5443762 
Degrees of freedom: 235 total; 216 residual
          [,1]
[1,] 0.1123255
matx <- as.matrix(m$residuals); rownames(matx) <- rownames(mydata)
spac <- lets.correl(x=matx, y=distm, z=12, equidistant = T, plot = T)

moran.test(residuals(m), nb2listw(nb))$p.value # too much.
[1] 0.9484634
hist(resid(m))

plot(m, resid(., type = "p") ~ fitted(.), abline = 0)

plot(m, scale(I(vlog(cost))) ~ fitted(.), abline = c(0,1))

qqnorm(m)


for(i in c("V22", "V23", "V24", "V25", "V26", "V27", "V28", "V29", "V30", "V31", "V32", "V33", "V34", "V35", "V36", "V37")){
  x <- ggplot(world)+
  geom_sf() +
  geom_point(data = mydata[order(mydata[,i]),], aes(y=lat, x=lon, color = mydata[,i]), alpha = 0.9) +
  scale_color_viridis()
  print(x)
}

Is cost predicted by latitude plus latitude2?


# vector 1 added ---
m <- gls(scale(I(vlog(cost))) ~ scale(lat) + scale(I(lat^2)) + V22 + V23 + V24 + V25, correlation = corPagel(0.99, phy = tree,fixed = F, form = ~Species.1), data = mydata, method = "REML"); summary(m); cor(predict(m),scale(I(vlog(mydata$cost))))
Generalized least squares fit by REML
  Model: scale(I(vlog(cost))) ~ scale(lat) + scale(I(lat^2)) + V22 + V23 +      V24 + V25 
  Data: mydata 

Correlation Structure: corPagel
 Formula: ~Species.1 
 Parameter estimate(s):
   lambda 
0.1048059 

Coefficients:

 Correlation: 
                (Intr) scl(l) s(I(^2 V22    V23    V24   
scale(lat)       0.001                                   
scale(I(lat^2)) -0.064  0.104                            
V22             -0.070 -0.017  0.013                     
V23             -0.041 -0.007 -0.006  0.011              
V24             -0.031  0.013  0.018  0.009  0.001       
V25              0.032  0.005 -0.013 -0.021 -0.008 -0.004

Standardized residuals:
       Min         Q1        Med         Q3        Max 
-3.2135868 -0.5833627  0.1685737  0.6876315  2.6281318 

Residual standard error: 0.9181122 
Degrees of freedom: 235 total; 228 residual
          [,1]
[1,] 0.4518531
matx <- as.matrix(m$residuals); rownames(matx) <- rownames(mydata)
spac <- lets.correl(x=matx, y=distm, z=12, equidistant = T, plot = T)

moran.test(residuals(m), nb2listw(nb))$p.value # too much.
[1] 0.7738208
hist(resid(m))

plot(m, resid(., type = "p") ~ fitted(.), abline = 0)

plot(m, scale(I(vlog(cost))) ~ fitted(.), abline = c(0,1))

qqnorm(m)


for(i in c("V22", "V23", "V24", "V25")){
  x <- ggplot(world)+
  geom_sf() +
  geom_point(data = mydata[order(mydata[,i]),], aes(y=lat, x=lon, color = mydata[,i]), alpha = 0.9) +
  scale_color_viridis()
  print(x)
}

Sensitivity Analyses

# 1 Pair age (all v. < 8mya (end of uplift of Andes))
# 2 Distance (all v. < 1500*1000) (1500 / 110 = ~ 22 degrees)
# 3 MAT_overlap (> 0% v. > 75% (more restrictive == more conservative for this measure.))
# 4 landgap (all v. nogap) *ALL GAPS ARE < 110km (two water grid cells marked as land for having >50% land @ 0.5 degree resolution.)
---
title: "R Notebook"
output: html_notebook
---


Prepare the Workspace
```{r}
knitr::opts_chunk$set(warning=FALSE, message=FALSE, error = FALSE) # rmd options
rm(list = ls()); invisible(gc()) # cleaning
```
<br><br><br><br><br>

Control Block
```{r} 
# f, f, 0.25 looks nice.
allowmigrants <- F # OPTIONS: T, F 
allowsympatry <- F # OPTIONS: T, F
minoverperc <- 0 # remove pairs that do not have thermal overlap (anagenesis)
costvar <- "ele"
```
<br><br><br><br><br>

Packages & Prefs
```{r}
options(scipen = 999) # turn off scientific notation
'%notin%' <- Negate('%in%')
require(ggplot2) # load packages
require(GGally)
require(viridis)
require(caper)
library(dplyr)
library(stringr)
library(maps)
library(ape)
library(EnvStats)
library(forecast)
require(nlme)
require(geodist)
require(letsR)
require(spdep)
require(spatialreg)
require(rnaturalearth)
require(rnaturalearthdata)
require(rgeos)
require(sf)
require(rgdal)
require(raster)
world <- ne_coastline(scale = "medium", returnclass = "sf")
vlog <- function(x){
   log( x + abs(min( x , na.rm = T)) + 1)
}

setwd("~/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Analyze_Processed_Cluster_Outputs/Data")
exclusion <- read.csv(file = "exclusion_nonsimpatric_nonmigrant.csv")
exclusion$realm1red[is.na(exclusion$realm1red)] <- "NA" # NA is north america not R's NA value. fix.
exclusion$realm2green[is.na(exclusion$realm2green)] <- "NA"
```
<br><br><br><br><br>

Load Main Data
```{r}
# main dataframe ---------------------------------
setwd("~/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Process_Cluster_Outputs/Data")
load(file =  "Pair_Barrier_Data_FEB2021.rdata")
mydata <- mypairdata; rm(mypairdata)
rownames(mydata) <- mydata$Species.1
mydatahold <- mydata
```
<br><br><br><br><br>

initial masks
```{r}
# migration ---
if(allowmigrants == F){
  mydata <- mydata[which(mydata$Migration == 1.0),]
}

# patry
if(allowsympatry == F){
  mydata <- mydata[which(mydata[,paste0(costvar, "_c0")] > 0 ),] # doesnt matter if you use ele, mat, vart paths here, will be same answer
}
```

sort
```{r}
mydata$uniquePairId == exclusion$mydata.uniquePairId
x <- mydata[which(mydata$Species.1 == "Apteryx_owenii"),]
mydata <- mydata[order(match(mydata$uniquePairId,exclusion$mydata.uniquePairId)), ]
y <- mydata[which(mydata$Species.1 == "Apteryx_owenii"),]
# sum(y!=x, na.rm = T) # checks.
# head(mydata)
mydata$uniquePairId == exclusion$mydata.uniquePairId
rm(x,y)
```



```{r}
# # Basic range maps for all pairs (no paths)
# wdPAM <- "/Users/boterolab1/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/PREP/PAM/Data"
# setwd(wdPAM); load("cbPAM.rdata")
# setwd("~/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Process_Cluster_Outputs/Data")
# pdf("pairmaps2.pdf", width = 19, height = 9.25)
# for (i in 1:nrow(mydata)){
# x <- cbPAM[,c("Longitude(x)","Latitude(y)",mydata$Species.1bl[i])]
# x <- as.data.frame(x[x[,3] == 1,])
# if(ncol(x) == 1) {
#   x <- t(x)
#   colnames(x) <- c("lon", "lat", "pres")
#   x <- as.data.frame(x)
# } else {
#   colnames(x) <- c("lon", "lat", "pres")
# }
# 
# y <- cbPAM[,c("Longitude(x)","Latitude(y)",mydata$Species.2bl[i])]
# y <- as.data.frame(y[y[,3] == 1,])
# if(ncol(y) == 1) {
#   y <- t(y)
#   colnames(y) <- c("lon", "lat", "pres")
#   y <- as.data.frame(y)
# 
# } else {
#   colnames(y) <- c("lon", "lat", "pres")
# }
# z <- ggplot(world)+
#     geom_sf() +
#     geom_point(data = x, aes(y=lat, x=lon), color = "red") +
#     geom_point(data = y, aes(y=lat, x=lon), color = "green") +
#     theme_bw() +
#     ggtitle(i)
# print(z)
# }
# print(i)
# dev.off()
```

```{r}
mydata_exclusion <- cbind(mydata, exclusion)
save(mydata_exclusion, file = "mydata_exclusion.rdata")
mydata$realm1 <- exclusion$realm1red
mydata$realm2 <- exclusion$realm2green
mydata$realm <- paste0(mydata$realm1, mydata$realm2)
table(mydata$realm)
mydata$realm[mydata$realm == "AAIM"]; mydata$realm[mydata$realm == "IMAA"] <-  "AAIM"
mydata$realm[mydata$realm == "ATIM"]; mydata$realm[mydata$realm == "IMAT"] <-  "ATIM"
mydata$realm[mydata$realm == "ATPA"]; mydata$realm[mydata$realm == "PAAT"] <-  "ATPA"
mydata$realm[mydata$realm == "IMNT"]; mydata$realm[mydata$realm == "NTIM"] <-  "IMNT"
mydata$realm[mydata$realm == "NTAA"]; mydata$realm[mydata$realm == "AANT"] <-  "NTAA"
mydata$realm[mydata$realm == "NTNA"]; mydata$realm[mydata$realm == "NANT"] <-  "NTNA"
mydata$realm[mydata$realm == "PANA"]; mydata$realm[mydata$realm == "NAPA"] <-  "PANA"
mydata$realm <- as.factor(mydata$realm); mydata$realm <- relevel(mydata$realm, "NTNT")

mydata$landgap <- as.logical(exclusion$island)

mydata$cosmopolitan <- as.logical(exclusion$cosmopolitan)

mydata$new.old <- as.logical(exclusion$new.old)

rm(exclusion, mydata_exclusion)
```



Impose masks & do calcuations
```{r}
# filter cosmopolitan and new/old world species (there are relatively few after imposing previous masks.)
mydata <- mydata[which(mydata$cosmopolitan == FALSE & mydata$new.old == FALSE),]

# dependent variable: elevational barrier size ---
mydata$cost <- mydata[, paste0(costvar, "_c25")] 

# data filtering -----------------------
#  thermal overlap ---
mydata$MAT_overlap <- mydata[,paste0("MAT", "_ov_perc_smrnge")]
mydata <- mydata[mydata$MAT_overlap > minoverperc,]

# update sort order --------------------
mydata$sortorder <- seq(1:nrow(mydata))

# longitude ----------------------------
mydata$lon <- mydata[,paste0("lon_mean_pair_", costvar, "_c25")]

# latitude ----------------------------
mydata$lat <- mydata[,paste0("lat_mean_pair_", costvar, "_c25")]

# temperature breadth -----------------
mydata$tas_breadth <- mydata$tas_range # mean(mean(sp1 annual tas range -- one value per cell), mean(sp2 annual tas range -- one value per cell))

# mean annual temperature --------------
mydata$tas_position <- mydata$tas_mean # mean(mean(sp1 annual tas mean  -- one value per cell), mean(sp2 annual tas mean  -- one value per cell))

# precipitation breadth --------------- 
mydata$pcp_breadth <- mydata$pcp_range # mean(mean(sp1 annual pcp range  -- one value per cell), mean(sp2 annual pcp range  -- one value per cell))

# precipitation breadth --------------- 
mydata$pcp_position <- mydata$pcp_mean # mean(mean(sp1 annual pcp mean  -- one value per cell), mean(sp2 annual pcp mean  -- one value per cell))

# distance -----------------------------
mydata$distance <- mydata[,paste0("centroid_distance_",costvar,"_c25")]

# mountain mass ------------------------
mtns <- readOGR(dsn="~/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Other_Input_Data/GMBA", layer="GMBA Mountain Inventory_v1.2-World", verbose = FALSE)
wdPAM <- "/Users/boterolab1/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/PREP/PAM/Data"
setwd(wdPAM); load("LonLat_BirdPAM_raster.rdata")
mtns <- rasterize(mtns, LonLat_BirdPAM_raster)
mtns@data@values[!is.na(mtns@data@values)] <- 1 # replace mountain IDs with simple coding. 1 for mountain...
mtns@data@values[is.na(mtns@data@values)] <- 0 # ...0 for no mountain

mydata$mtn_mass <- NA
for (i in 1:nrow(mydata)) {
  coords1<-data.frame(lon=mydata$lon[i], lat=mydata$lat[i]); coordinates(coords1)<-c("lon","lat"); crs(coords1)<-crs(LonLat_BirdPAM_raster) # get coordinates
  z <- extract(mtns, coords1, buffer = raster::pointDistance(c(0,0), c(0,8), lonlat = T))
  mydata$mtn_mass[i] <- sum(z[[1]]) / length(z[[1]])
}

raster::pointDistance(c(0,0), c(0,8), lonlat = T) # corresponds to a radius of just about 8 degrees.
raster::pointDistance(c(75,75), c(75,(75+8)), lonlat = T) # polar circles are bigger, but not that much bigger. so should be OK.
raster::plot(mtns)
raster::plot(world$geometry, add = T)
plotrix::draw.circle(70, 40, 8, nv = 1000, border = "hotpink", lty = 1, lwd = 1)
plotrix::draw.circle(140, -5, 8, nv = 1000, border = "hotpink", lty = 1, lwd = 1)
plotrix::draw.circle(-80, 10, 8, nv = 1000, border = "hotpink", lty = 1, lwd = 1)
plotrix::draw.circle(-70, -50, 8, nv = 1000, border = "hotpink",lty = 1, lwd = 1)

x <- ggplot(world)+
  geom_sf() +
  geom_point(data = mydata[order(mydata[, "mtn_mass"], decreasing = F),], aes(y=lat, x=lon, color = mtn_mass), alpha = 0.9) +
  ggtitle(i)+scale_color_viridis()
print(x)

# water buffering ----------------------
wtr <- read_sf("~/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Other_Input_Data/ne_50m_ocean/ne_50m_ocean.shp")
temp <- mtns
values(temp) <- 100
temp <- mask(temp, wtr)
wtr <- mtns
values(wtr) <- 0
wtr[temp == 100] <- 1; rm(temp)
plot(wtr)

mydata$wtrcells <- NA
mydata$wtrmass <- NA
for (i in 1:nrow(mydata)) {
  coords1<-data.frame(lon=mydata$lon[i], lat=mydata$lat[i]); coordinates(coords1)<-c("lon","lat"); crs(coords1)<-crs(LonLat_BirdPAM_raster) # get coordinates
  z <- extract(wtr, coords1, buffer = raster::pointDistance(c(0,0), c(0,8), lonlat = T))
  mydata$miniplot[i] <- length(z[[1]])
  mydata$wtrcells[i] <- sum(z[[1]])
  mydata$wtrmass[i] <- sum(z[[1]]) / length(z[[1]])
}
mydata$water_buffering <- mydata$wtrmass

x <- ggplot(world)+
  geom_sf() +
  geom_point(data = mydata[order(mydata[, "water_buffering"], decreasing = F),], aes(y=lat, x=lon, color = water_buffering), alpha = 0.9) +
  ggtitle(i)+scale_color_viridis()
print(x)


# dispersal ability --------------------
dispab <- read.csv("~/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Other_Input_Data/Bird Hand-Wing Index/Dataset HWI 2020-04-10.csv")
mydata$dispersal_ability <- NA
for (i in 1:nrow(mydata)){
  dispab_sp1 <- dispab$HWI[dispab$IUCN.name == mydata$Species.1bl[i]]
  dispab_sp2 <- dispab$HWI[dispab$IUCN.name == mydata$Species.2bl[i]]
  dispab_pair <- mean(c(dispab_sp1, dispab_sp2), na.rm = T)
  mydata$dispersal_ability[i] <- dispab_pair
  rm(dispab_sp1, dispab_sp2, dispab_pair)
}
mydata <- mydata[!is.nan(mydata$dispersal_ability),]

# pair age -----------------------------
mydata$pair_age <- mydata$Pair.age..MY.

# length of boundary -------------------
mydata$boundary_length <- mydata$boundary_length_ele_c25

# retain cols of interest only.
mydata <- mydata[,c("uniquePairId", "Species.1", "Species.2", "Species.1bl", "Species.2bl", "cost", "lat", "lon",
                    "tas_breadth","tas_position", "pcp_breadth","pcp_position", "mtn_mass", "water_buffering", 
                    "dispersal_ability", "pair_age", "distance", "boundary_length", "MAT_overlap", "realm",
                    "landgap")]
```

load phylo and prune
```{r}
# phylo ------------------------------------------
setwd("~/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Other_Input_Data/BirdTrees")
load(file = "BirdTrees.Rdata")
tree <- trees[[1]]; rm(trees) # pick tree (VF GET TREES FROM COONEY!!! and use MCC tree.) -- currently just using tree 1 here. 
tree <- drop.tip(tree, tree$tip.label[which(tree$tip.label %notin% mydata$Species.1)]) # initial name matching.
mydata <- mydata[which(mydata$Species.1 %in% tree$tip.label),]
mydata <- mydata[match(tree$tip.label, mydata$Species.1),]
sum(mydata$Species.1 != tree$tip.label) # sorted (but still specify form below for safety)
```

Save freame for CB
```{r}
setwd("~/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Analyze_Processed_Cluster_Outputs/Data")
save(mydata, file = "ele_data_for_CB.rdata")
write.csv(mydata, file = "ele_data_for_CB.csv")
```



Neighbors for sptial analysis. 
```{r}
distm <- mydata[,c("lon", "lat")]
distm <- geodist(distm, measure = "geodesic")
rownames(distm) <- rownames(mydata)
colnames(distm) <- rownames(mydata)
basecols <- ncol(mydata)

# neightbors
coords<-cbind(mydata$lon, mydata$lat); coords<-as.matrix(coords) ; row.names(coords)<-rownames(mydata)
k1 <- knn2nb(knearneigh(coords, longlat = T))
nb<- dnearneigh(coords,row.names = row.names(coords), d1=0,d2=max(unlist(nbdists(k1, coords, longlat = T))),longlat=T)
```


plots for transformations
```{r}
for (i in c("cost", "lat", "lon","tas_breadth","tas_position","pcp_breadth","pcp_position", "mtn_mass", "dispersal_ability", "pair_age", "distance", "boundary_length", "MAT_overlap")) {
hist(mydata[, i], breaks = 50, main = i)
}
```

world plots
```{r}
setwd("~/Box Sync/CB_VF_Shared/Dry_Lab/Projects/JMPH/Analyze_Processed_Cluster_Outputs/Data")
# pdf(file="variable_maps.pdf", width = 10, height = 7)
for (i in c("cost", "lat", "lon","tas_breadth","tas_position","pcp_breadth","pcp_position", "mtn_mass",  "water_buffering", "dispersal_ability", "pair_age", "distance", "boundary_length", "MAT_overlap", "realm", "landgap")) {
  if(i %in% c("cost")){
    myc <- mydata[, i]; myc <- vlog(myc)
    x <- ggplot(world)+
      geom_sf() +
      geom_point(data = mydata[order(mydata[, i], decreasing = F),], aes(y=lat, x=lon, color = myc), alpha = 0.85) +
      ggtitle(paste0("ln ",i))+
      scale_color_viridis()
    print(x)
  } else if (i %in% c("realm", "landgap")){
    x <- ggplot(world)+
      geom_sf() + 
      geom_point(data = mydata[order(mydata[, "cost"], decreasing = F),], aes(y=lat, x=lon, color = get(i)), alpha = 0.85) +
      ggtitle(paste0(i))+
      scale_color_viridis(discrete = T)
    print(x) 
  } else {
    x <- ggplot(world)+
      geom_sf() + 
      geom_point(data = mydata[order(mydata[, "cost"], decreasing = F),], aes(y=lat, x=lon, color = get(i)), alpha = 0.85) +
      ggtitle(i)+
      scale_color_viridis()
    print(x) 
  }
}
# dev.off()
```


```{r}
mydata <- mydata[, 1:basecols]
# no spatial filtering ---------------------------------------------------------
m <- gls(scale(I(vlog(cost))) ~ scale(I(vlog(tas_breadth))), correlation = corPagel(0.99, phy = tree, fixed = F, form = ~Species.1), data = mydata, method = "REML");summary(m)
print(paste("Correlation between data and prediction:  ", cor(predict(m),scale(I(vlog(mydata$cost))))))
hist(resid(m))
plot(m, resid(., type = "p") ~ fitted(.), abline = 0)
plot(m, scale(I(vlog(cost))) ~ fitted(.), abline = c(0,1))
qqnorm(m)

# plot ---------------------------------
plot(scale(I(vlog(mydata$tas_breadth))), scale(I(vlog(mydata$cost))), pch = 16,
     xlab = "thermal niche breadth", ylab = paste0("log ", costvar, " cost" ), main = "Global model")
myx <- seq(min(scale(vlog(mydata$tas_breadth))), max(scale(vlog(mydata$tas_breadth))), length.out=100) # plot fit
mycoefs<-coef(m)
myy <- mycoefs[1] + mycoefs[2]*myx
lines(c(0,0), c(-10,100), lty=2)
lines(myx,myy,col = 'red')
m <- gls(scale(I(vlog(cost))) ~ scale(I(log(tas_breadth))), data = mydata, method = "REML")
myx <- seq(min(scale(vlog(mydata$tas_breadth))), max(scale(vlog(mydata$tas_breadth))), length.out=100) # plot fit
mycoefs<-coef(m)
myy <- mycoefs[1] + mycoefs[2]*myx
lines(c(0,0), c(-10,100), lty=2)
lines(myx,myy,col = 'red', lty=2)




# with spatial filtering -------------------------------------------------------
m <- gls(scale(I(vlog(cost))) ~ scale(I(vlog(tas_breadth))), correlation = corPagel(0.99, phy = tree, fixed = F, form = ~Species.1), data = mydata, method = "REML")
# spatial autocorr ---------------------
matx <- as.matrix(m$residuals); rownames(matx) <- rownames(mydata)
spac <- lets.correl(x=matx, y=distm, z=12, equidistant = T, plot = T)
moran.test(residuals(m), nb2listw(nb))$p.value # no evidence of spatial component.. 

# spatial filtering --------------------
rm(sarcol)
sarcol <- SpatialFiltering(formula = scale(I(vlog(cost))) ~ scale(I(vlog(tas_breadth))),
                           data = mydata,nb=nb, style="W", ExactEV = TRUE)
mydata[,c((basecols+1):(basecols+1+ dim(fitted(sarcol))[2]-1))]<-fitted(sarcol)
colnames(mydata)

# vector 1 added ---
m <- gls(scale(I(vlog(cost))) ~ scale(I(vlog(tas_breadth))) + V22 + V23 + V24 + V25 + V26 + V27 + V28 + V29 + V30, correlation = corPagel(0.99, phy = tree,fixed = F, form = ~Species.1), data = mydata, method = "REML"); summary(m); cor(predict(m),scale(I(vlog(mydata$cost))))
matx <- as.matrix(m$residuals); rownames(matx) <- rownames(mydata)
spac <- lets.correl(x=matx, y=distm, z=12, equidistant = T, plot = T)
moran.test(residuals(m), nb2listw(nb))$p.value # too much.
hist(resid(m))
plot(m, resid(., type = "p") ~ fitted(.), abline = 0)
plot(m, scale(I(vlog(cost))) ~ fitted(.), abline = c(0,1))
qqnorm(m)

for(i in c("V22", "V23", "V24", "V25", "V26", "V27", "V28", "V29", "V30")){
  x <- ggplot(world)+
  geom_sf() +
  geom_point(data = mydata[order(mydata[,i]),], aes(y=lat, x=lon, color = mydata[,i]), alpha = 0.9) +
  scale_color_viridis()+
  ggtitle(i)
  print(x)
}
```


Is thermal niche breadth predicted by latitude plus latitude2? 
```{r}
mydata <- mydata[, 1:basecols]
m <- gls(scale(I(vlog(tas_breadth))) ~ scale(lat) + scale(I(lat^2)), correlation = corPagel(0.99, phy = tree, fixed = F, form = ~Species.1), data = mydata, method = "REML"); summary(m)
print(paste("Correlation between data and prediction:  ", cor(predict(m),scale(I(vlog(mydata$tas_breadth))))))
hist(resid(m))
plot(m, resid(., type = "p") ~ fitted(.), abline = 0)
plot(m, scale(I(vlog(cost))) ~ fitted(.), abline = c(0,1))
qqnorm(m)

# plot ---------------------------------
plot(scale(mydata$lat), scale(I(vlog(mydata$tas_breadth))), pch = 16,
     xlab = "latitude", ylab = "thermal niche breadth", main = "Global model")
myx <- seq(min(scale(mydata$lat)), max(scale(mydata$lat)), length.out=100) # plot fit
mycoefs<-coef(m)
myy <- mycoefs[1] + mycoefs[2]*myx + mycoefs[3]*myx^2
lines(c(0,0), c(-10,100), lty=2)
lines(myx,myy,col = 'red')
m <- gls(scale(I(vlog(tas_breadth))) ~ scale(lat) + scale(I(lat^2)), data = mydata, method = "REML")
myx <- seq(min(scale(mydata$lat)), max(scale(mydata$lat)), length.out=100) # plot fit
mycoefs<-coef(m)
myy <- mycoefs[1] + mycoefs[2]*myx + mycoefs[3]*myx^2
lines(c(0,0), c(-10,100), lty=2)
lines(myx,myy,col = 'red', lty=2)






# with spatial filtering -------------------------------------------------------
m <- gls(scale(I(vlog(tas_breadth))) ~ scale(lat) + scale(I(lat^2)), correlation = corPagel(0.99, phy = tree, fixed = F, form = ~Species.1), data = mydata, method = "REML")
# spatial autocorr ---------------------
matx <- as.matrix(m$residuals); rownames(matx) <- rownames(mydata)
spac <- lets.correl(x=matx, y=distm, z=12, equidistant = T, plot = T)
moran.test(residuals(m), nb2listw(nb))$p.value # no evidence of spatial component.. 

# spatial filtering --------------------
rm(sarcol)
sarcol <- SpatialFiltering(formula = scale(I(vlog(tas_breadth))) ~ scale(lat) + scale(I(lat^2)),
                           data = mydata,nb=nb, style="W", ExactEV = TRUE)
mydata[,c((basecols+1):(basecols+1+ dim(fitted(sarcol))[2]-1))]<-fitted(sarcol)
colnames(mydata) 

# vector 1 added ---
m <- gls(scale(I(vlog(tas_breadth))) ~ scale(lat) + scale(I(lat^2)) + V22 + V23 + V24 + V25 + V26 + V27 + V28 + V29 + V30 + V31 + V32 + V33 + V34 + V35 + V36 + V37, correlation = corPagel(0.99, phy = tree,fixed = F, form = ~Species.1), data = mydata, method = "REML"); summary(m); cor(predict(m),scale(I(vlog(mydata$cost))))
matx <- as.matrix(m$residuals); rownames(matx) <- rownames(mydata)
spac <- lets.correl(x=matx, y=distm, z=12, equidistant = T, plot = T)
moran.test(residuals(m), nb2listw(nb))$p.value # too much.
hist(resid(m))
plot(m, resid(., type = "p") ~ fitted(.), abline = 0)
plot(m, scale(I(vlog(cost))) ~ fitted(.), abline = c(0,1))
qqnorm(m)

for(i in c("V22", "V23", "V24", "V25", "V26", "V27", "V28", "V29", "V30", "V31", "V32", "V33", "V34", "V35", "V36", "V37")){
  x <- ggplot(world)+
  geom_sf() +
  geom_point(data = mydata[order(mydata[,i]),], aes(y=lat, x=lon, color = mydata[,i]), alpha = 0.9) +
  scale_color_viridis()
  print(x)
}
```

Is cost  predicted by latitude plus latitude2? 
```{r}
mydata <- mydata[, 1:basecols]
m <- gls(scale(I(vlog(cost))) ~ scale(lat) + scale(I(lat^2)), correlation = corPagel(0.99, phy = tree, fixed = F, form = ~Species.1), data = mydata, method = "REML"); summary(m)
print(paste("Correlation between data and prediction:  ", cor(predict(m),scale(I(vlog(mydata$cost))))))
hist(resid(m))
plot(m, resid(., type = "p") ~ fitted(.), abline = 0)
plot(m, scale(I(vlog(cost))) ~ fitted(.), abline = c(0,1))
qqnorm(m)

# plot ---------------------------------
plot(scale(mydata$lat), scale(I(vlog(mydata$cost))), pch = 16,
     xlab = "latitude", ylab = "cost", main = "Global model")
myx <- seq(min(scale(mydata$lat)), max(scale(mydata$lat)), length.out=100) # plot fit
mycoefs<-coef(m)
myy <- mycoefs[1] + mycoefs[2]*myx + mycoefs[3]*myx^2
lines(c(0,0), c(-10,100), lty=2)
lines(myx,myy,col = 'red')
m <- gls(scale(I(vlog(cost))) ~ scale(lat) + scale(I(lat^2)), data = mydata, method = "REML")
myx <- seq(min(scale(mydata$lat)), max(scale(mydata$lat)), length.out=100) # plot fit
mycoefs<-coef(m)
myy <- mycoefs[1] + mycoefs[2]*myx + mycoefs[3]*myx^2
lines(c(0,0), c(-10,100), lty=2)
lines(myx,myy,col = 'red', lty=2)






# with spatial filtering -------------------------------------------------------
m <- gls(scale(I(vlog(cost))) ~ scale(lat) + scale(I(lat^2)), correlation = corPagel(0.99, phy = tree, fixed = F, form = ~Species.1), data = mydata, method = "REML")
# spatial autocorr ---------------------
matx <- as.matrix(m$residuals); rownames(matx) <- rownames(mydata)
spac <- lets.correl(x=matx, y=distm, z=12, equidistant = T, plot = T)
moran.test(residuals(m), nb2listw(nb))$p.value # no evidence of spatial component.. 

# spatial filtering --------------------
rm(sarcol)
sarcol <- SpatialFiltering(formula = scale(I(vlog(cost))) ~ scale(lat) + scale(I(lat^2)),
                           data = mydata,nb=nb, style="W", ExactEV = TRUE)
mydata[,c((basecols+1):(basecols+1+ dim(fitted(sarcol))[2]-1))]<-fitted(sarcol)
colnames(mydata) 

# vector 1 added ---
m <- gls(scale(I(vlog(cost))) ~ scale(lat) + scale(I(lat^2)) + V22 + V23 + V24 + V25, correlation = corPagel(0.99, phy = tree,fixed = F, form = ~Species.1), data = mydata, method = "REML"); summary(m); cor(predict(m),scale(I(vlog(mydata$cost))))
matx <- as.matrix(m$residuals); rownames(matx) <- rownames(mydata)
spac <- lets.correl(x=matx, y=distm, z=12, equidistant = T, plot = T)
moran.test(residuals(m), nb2listw(nb))$p.value # too much.
hist(resid(m))
plot(m, resid(., type = "p") ~ fitted(.), abline = 0)
plot(m, scale(I(vlog(cost))) ~ fitted(.), abline = c(0,1))
qqnorm(m)

for(i in c("V22", "V23", "V24", "V25")){
  x <- ggplot(world)+
  geom_sf() +
  geom_point(data = mydata[order(mydata[,i]),], aes(y=lat, x=lon, color = mydata[,i]), alpha = 0.9) +
  scale_color_viridis()
  print(x)
}
```


```{r}
mydata <- mydata[, 1:basecols]
m <- gls(scale(I(vlog(cost))) ~ scale(I((tas_breadth))) + scale(I((tas_position))) + scale(I((pcp_breadth))) + scale(I((pcp_position))) + 
           scale(I((mtn_mass))) + scale(I((water_buffering))) + scale(I((dispersal_ability))) + scale(I((pair_age))) + scale(I((distance))) + scale(I((boundary_length))) + 
           scale(I((MAT_overlap))) + landgap + realm,
         correlation = corPagel(0.99, phy = tree, fixed = F, form = ~Species.1), data = mydata, method = "REML");summary(m)
# car::vif(m) # does not work with realm added in. 
print(paste("Correlation between data and prediction:  ", cor(predict(m),scale(I(vlog(mydata$cost))))))
hist(resid(m))
plot(m, resid(., type = "p") ~ fitted(.), abline = 0)
plot(m, scale(I(vlog(cost))) ~ fitted(.), abline = c(0,1))
qqnorm(m)


# spatial autocorr ---------------------
matx <- as.matrix(m$residuals); rownames(matx) <- rownames(mydata)
spac <- lets.correl(x=matx, y=distm, z=12, equidistant = T, plot = T)
moran.test(residuals(m), nb2listw(nb))$p.value # no evidence of spatial component.. 

# spatial filtering --------------------
rm(sarcol)
sarcol <- SpatialFiltering(scale(I(vlog(cost))) ~ scale(I((tas_breadth))) + scale(I((tas_position))) + scale(I((pcp_breadth))) + scale(I((pcp_position))) + 
           scale(I((mtn_mass))) + scale(I((water_buffering))) + scale(I((dispersal_ability))) + scale(I((pair_age))) + scale(I((distance))) + scale(I((boundary_length))) + 
           scale(I((MAT_overlap))) + landgap + realm,
                           data = mydata,nb=nb, style="W", ExactEV = TRUE)
mydata[,c((basecols+1):(basecols+1+ dim(fitted(sarcol))[2]-1))]<-fitted(sarcol)
colnames(mydata) 

# vector 1 added ---
mydata$realm2 <- as.factor(mydata$realm2); mydata$realm2 <- relevel(mydata$realm2, ref = "NT")
m <- gls(scale(I(vlog(cost))) ~ scale(I((tas_breadth))) + scale(I((tas_position))) + scale(I((pcp_breadth))) + scale(I((pcp_position))) + 
           scale(I((mtn_mass))) + scale(I((water_buffering))) + scale(I((dispersal_ability))) + scale(I((pair_age))) + scale(I((distance))) + scale(I((boundary_length))) + 
           scale(I((MAT_overlap))) + landgap + realm + V22 + V23 + V24 + V25 + V26 + V27 + V28 + V29 + V30 + V31 + V32 + V33, correlation = corPagel(0.99, phy = tree, fixed = F, form = ~Species.1), data = mydata, method = "REML"); summary(m); cor(predict(m),scale(I(vlog(mydata$cost))))
matx <- as.matrix(m$residuals); rownames(matx) <- rownames(mydata)
spac <- lets.correl(x=matx, y=distm, z=12, equidistant = T, plot = T)
moran.test(residuals(m), nb2listw(nb))$p.value # too much.
hist(resid(m))
plot(m, resid(., type = "p") ~ fitted(.), abline = 0)
plot(m, scale(I(vlog(cost))) ~ fitted(.), abline = c(0,1))
qqnorm(m)


for(i in c("V22","V23", "V24", "V25", "V26", "V27", "V28", "V29", "V30", "V31", "V32", "V33")){
  x <- ggplot(world)+
  geom_sf() +
  geom_point(data = mydata[order(mydata[,i]),], aes(y=lat, x=lon, color = mydata[,i]), alpha = 0.9) +
  scale_color_viridis()
  print(x)
}
```


Sensitivity Analyses
```{r}
# 1 Pair age (all v. < 8mya (end of uplift of Andes))
# 2 Distance (all v. < 1500*1000) (1500 / 110 = ~ 22 degrees)
# 3 MAT_overlap (> 0% v. > 75% (more restrictive == more conservative for this measure.))
# 4 landgap (all v. nogap) *ALL GAPS ARE < 110km (two water grid cells marked as land for having >50% land @ 0.5 degree resolution.)
```